/* 

This dataset cleans the agency ratings data from the SLBFE


*/


clear
set matsize 100
set more off 

* Set directory; 

cd "~/Desktop/migrec_replication/do/";


********************************************************************************
* 1) Create agency ratings - 2012 & 2014
********************************************************************************
	
*********************************************
* 2014 Ratings

* Import excel files
	forval i = 1/5 {
	import excel using "../raw_secure/slbfe_ratings.xlsx", sheet(star`i') first
		replace variable = trim(variable)
		replace variable = lower(variable)
		tempfile star`i'
		save `star`i'' ,replace
		clear
	}
	
	use `star1', clear
	
	forval i = 2/5 {
		append using `star`i'', gen(_m_star`i')
	}
	

* Clean the data 
	replace variable = "valid_upto" if variable == "valid_date" | variable == "valid upto"
	replace variable = "licensee" if variable == "licencee"
	
	split detail if variable == "license_no", parse(")")
	replace detail = detail1 if variable == "license_no"
	drop detail1
	
	g star = 1
	replace star = 2 if _m_star2 == 1 
	replace star = 3 if _m_star3 == 1 
	replace star = 4 if _m_star4 == 1 
	replace star = 5 if _m_star5 == 1 
	drop _m_*

	replace note = "Agency Validation Expired" if note == "< Agency Validation Expired ! >"
	replace note = "License Pending - Applied for Renewal" if note == "< Licence Pending -Applied for Renewal ! >"
	replace note = "" if note == "."
	
	encode note, gen(comment)
	drop note
	bys id: egen note = max(comment)
	drop comment
	la define notelab 1 "Agency Validation Expired" 2 "License Pending - Applied for Renewal"
	la values note  notelab
	
	reshape wide detail, i(id) j(variable) string
	rename detail* *
	
	g year_rating = 2014 
	destring, replace
	rename id uid
	
	la var star "Agency rating"
	la var year_rating "Year rating awarded"
	la var note "Note on agency"

* Save tempfile
	tempfile ratings2014
	save `ratings2014', replace 

*********************************************
* 2012 Ratings

* Import all excel worksheets
	clear

	*local stub power travels tour agen pvtltd recr inter enter gulflimserjob
	
	foreach x in all {
	import excel using "../raw/excel/slbfe_ratings.xlsx", sheet(2012_`x') first
		replace variable = trim(variable)
		replace variable = lower(variable)
		tempfile 2012_`x'
		save `2012_`x'' ,replace
		clear
	}
	
	use `2012_all', clear
	
	*local stub travels tour agen pvtltd recr inter enter gulflimserjob
	*foreach x of local stub {
	*	append using `2012_`x'', gen(_m_`x')
	*}
	
* Clean the data 
	replace variable = "valid_upto" if variable == "valid upto"
	replace variable = "licensee" if variable == "licencee"
	replace detail = trim(detail)
	
	split detail if variable == "license_no", parse(")")
	replace detail = detail1 if variable == "license_no"
	drop detail1
				
	reshape wide detail, i(id) j(variable) string
	rename detail* *
	
	g year_rating = 2012 
	destring, replace
	rename id uid
	
	encode grade, gen(star)
	drop grade
	
	la var star "Agency rating"
	la var year_rating "Year rating awarded"

* save tempfiles
	tempfile ratings2012
	save `ratings2012', replace

*********************************************
* Merge ratings 
	
* combine ratings
	use `ratings2012', clear

		rename * *_2012 
		rename license_no_2012 license_no
		drop year 
			
		merge 1:1 license_no using `ratings2014', gen(_m_star2014)
			
		rename star star_2014
		drop year_rating uid*
		#delimit;
		rename (address agency_name email fax licensee telephone valid_upto note) 
			(address_2014 agency_name_2014 email_2014 fax_2014 licensee_2014 
			telephone_2014 valid_upto_2014 note_2014);
		#delimit cr
			
		order license_no agency_name_2012 agency_name_2014 star_*

	tempfile agency_ratings
	save `agency_ratings', replace 

********************************************************************************
* 2) Merge ratings with agency data 
********************************************************************************
			
* Load agency data
	
	clear 
	use "../dta_secure/agencies.dta"

* Clean to be able to merge
		
	rename an license_no
	rename agency_agencyname agency_name
	rename agency_agencytype agency_type
	
	la var agency_bl_status "Blacklist status" 
	la var agency_bl_date 	"Blacklist date" 
	la var agency_lissuedate "License issue date"
	
* tag duplicate agencies 
	duplicates tag agency_name, gen(agency_dup) //19 duplicates, some with same license year, others with not
	
* issue date
	g agency_license_yr = substr(agency_lissuedate, 7,4)
	g agency_license_month = substr(agency_lissuedate, 4,2)
	bys agency_license_yr: egen agency_count_yr = count(license_no)
	la var agency_count_yr "Number of new licenses issued in this year"
	
* blacklist date
	encode agency_bl_status, gen(agency_ever_bl)
	recode agency_ever_bl (2=1) (.=0)
	g agency_blacklist_yr = substr(agency_bl_date, 7,4)
	g agency_blacklist_month = substr(agency_bl_date, 4,2)

* Merge with ratings data
	
	merge 1:1 license_no using  `agency_ratings', gen(_m_ratings)

* Save dataset 

	save "../dta_secure/agency_ratings", replace 


	
